TODO:

  • Filter out cells pct_mt > 20%
  • Visualize markers proposed by Ferran (dot plot + UMAP of specific oness)
  • Might be worth to run monocle/PAGA + phateR + velocyto
  • Find subclusters for “Richter-like” clusters

1 Introduction

Here we will exclude the poor-quality clusters and erythrocytes that we identified previously. Subsequently, we will reprocess each patient-specific Seurat object including feature selection, principal component analysis (PCA) and UMAP.

2 Pre-processing

2.1 Load packages

library(Seurat)
library(ggpubr)
library(plotly)
library(tidyverse)

2.2 Parameters

# Paths
path_to_obj <- here::here("results/R_objects/6.seurat_list_annotated.rds")
path_to_save <- here::here("results/R_objects/patient_63/7.seurat_list_annotated_reprocessed.rds")


# Colors
color_palette <- c("black", "gray", "red", "yellow", "violet", "green4",
                   "blue", "mediumorchid2", "coral2", "blueviolet",
                   "indianred4", "deepskyblue1", "dimgray", "deeppink1",
                   "green", "lightgray", "hotpink1")

# QC thresholds
max_pct_mt <- 20


# Functions
source(here::here("bin/utils.R"))

2.3 Load data

seurat_list <- readRDS(path_to_obj)
seurat_list
## $`12`
## An object of class Seurat 
## 23403 features across 6212 samples within 1 assay 
## Active assay: RNA (23403 features, 2000 variable features)
##  3 dimensional reductions calculated: pca, harmony, umap
## 
## $`19`
## An object of class Seurat 
## 23403 features across 9047 samples within 1 assay 
## Active assay: RNA (23403 features, 2000 variable features)
##  3 dimensional reductions calculated: pca, harmony, umap
## 
## $`3299`
## An object of class Seurat 
## 23403 features across 6546 samples within 1 assay 
## Active assay: RNA (23403 features, 2000 variable features)
##  3 dimensional reductions calculated: pca, harmony, umap
purrr::map(seurat_list, DimPlot, cols = color_palette)
## $`12`

## 
## $`19`

## 
## $`3299`

3 Exclude clusters

exclude_clusters <- list(
  "12" = c("poor-quality", "erythrocytes"),
  "19" = c("poor-quality", "erythrocytes"),
  "3299" = c("poor-quality", "erythroblast")
)
seurat_list <- purrr::map2(seurat_list, exclude_clusters, function(seurat_obj, x) {
  selected_cells <- colnames(seurat_obj)[!(seurat_obj$annotation %in% x)]
  seurat_obj <- subset(seurat_obj, cells = selected_cells)
  seurat_obj$annotation <- droplevels(seurat_obj$annotation)
  seurat_obj
})

In addition, we have seen that we were too permissive with the threshold of mitochondrial expression, since poor-quality cells were clustering together. Thus, we will apply a new cutoff:

seurat_list <- purrr::map(seurat_list, function(seurat_obj) {
  seurat_obj <- subset(seurat_obj, subset = pct_mt <= max_pct_mt)
  seurat_obj
})

4 Reprocess

seurat_list <- purrr::map(seurat_list, function(seurat_obj) {
  seurat_obj <- seurat_obj %>%
    FindVariableFeatures() %>%
    ScaleData() %>%
    RunPCA() %>%
    RunUMAP(dims = 1:20, reduction = "pca")
  seurat_obj
})

UMAPs:

umaps_annotation <- purrr::map(
  seurat_list,
  DimPlot,
  cols = color_palette, pt.size = 0.8
)
umaps_annotation
## $`12`

## 
## $`19`

## 
## $`3299`

umaps_n_features <- purrr::map(seurat_list, function(seurat_obj) {
  p <- FeaturePlot(seurat_obj, "nFeature_RNA", pt.size = 0.8) +
    scale_color_viridis_c(option = "magma")
})
umaps_n_features
## $`12`

## 
## $`19`

## 
## $`3299`

5 Recluster

seurat_list <- purrr::map(seurat_list, function(seurat_obj) {
  seurat_obj <- FindNeighbors(seurat_obj, dims = 1:20)
  seurat_obj
})
resolutions <- c(
  "12" = 0.3,
  "19" = 0.4,
  "3299" = 0.3
)
seurat_list <- purrr::map2(seurat_list, resolutions, function(seurat_obj, res) {
  seurat_obj <- FindClusters(seurat_obj, resolution = res)
  seurat_obj
})
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 5913
## Number of edges: 207531
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8778
## Number of communities: 6
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 7844
## Number of edges: 259934
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8253
## Number of communities: 6
## Elapsed time: 1 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 6076
## Number of edges: 211530
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8479
## Number of communities: 4
## Elapsed time: 0 seconds
umaps_clusters <- purrr::map(
  seurat_list,
  DimPlot,
  cols = color_palette,
  pt.size = 0.75
)
umaps_clusters
## $`12`

## 
## $`19`

## 
## $`3299`

6 Markers

markers_list <- purrr::map(
  seurat_list,
  FindAllMarkers,
  only.pos = TRUE,
  logfc.threshold = 0.75
)
markers_list <- purrr::map(markers_list, function(df) {
  df <- df %>%
    group_by(cluster) %>%
    arrange(desc(avg_log2FC), .by_group = TRUE)
  df[, c(7, 6, 2, 5, 3, 4)]
})

7 Reannotate

7.1 12

DT::datatable(markers_list$`12`)
Cluster Markers Annotation
0 CXCR4 CXCR4+CD27-
1 CD27 CXCR4-CD27+
2 MIF, CCND2 Richter-like quiescent
3 MIR155HG, LRMP MIR155HG+
4 TOP2A, MKI67 Richter-like proliferative
5 MZB1, IGHM, XBP1, CLPTM1L MZB1+IGHM++XBP1+
seurat_list$`12`$annotation2 <- seurat_list$`12`$seurat_clusters
new_levels_12 <- c("CXCR4+CD27-", "CXCR4-CD27+", "Richter-like quiescent",
                   "MIR155HG+", "Richter-like proliferative", "MZB1+IGHM++XBP1+")
levels(seurat_list$`12`$annotation2) <- new_levels_12
Idents(seurat_list$`12`) <- "annotation2"
DimPlot(seurat_list$`12`, cols = color_palette, pt.size = 0.75)

feat_plot_12 <- purrr::map(c("CXCR4", "CD27"), function(x) {
  p <- FeaturePlot(seurat_list$`12`, features = x, pt.size = 0.75) +
    scale_color_viridis_c(option = "magma")
  p
})
feat_plot_12
## [[1]]

## 
## [[2]]

Interestingly, CLL cells seem to follow a trajectory in 3D:

seurat_list$`12` <- RunUMAP(seurat_list$`12`, dims = 1:20, n.components = 3, reduction.name = "umap_3D")
umap_12_df <- as.data.frame(seurat_list$`12`@reductions$umap_3D@cell.embeddings)
umap_12_df$annotation2 <- seurat_list$`12`$annotation2
umap_12_3D <- plot_ly(
  umap_12_df,
  x = ~umap_3d_1,
  y = ~umap_3d_2,
  z = ~umap_3d_3,
  color = ~factor(annotation2),
  colors = color_palette,
  size = 0.001,
  alpha = 0.4
)
umap_12_3D <- umap_12_3D %>%
  add_markers()
umap_12_3D
# Expression MIR155HG
umap_12_df$MIR155HG <- seurat_list$`12`[["RNA"]]@data["MIR155HG", ]
umap_12_mir155 <- plot_ly(
  umap_12_df,
  x = ~umap_3d_1,
  y = ~umap_3d_2,
  z = ~umap_3d_3,
  color = ~MIR155HG,
  size = 0.001,
  alpha = 0.7
)
umap_12_mir155 <- umap_12_mir155 %>%
  add_markers()
umap_12_mir155

Additional markers:

selected_markers <- c("CD5", "CD19", "MS4A1", "CD38", "PTPRC", "NCAM1")
dot_plot_12 <- DotPlot(
  seurat_list$`12`,
  features = selected_markers,
  group.by = "annotation2"
) +
  labs(x = "", y = "") +
  scale_color_viridis_c(option = "magma")
dot_plot_12

Subcluster Richter clones:

seurat_list$`12` <- FindSubCluster(
  seurat_list$`12`,
  graph.name = "RNA_snn",
  cluster = "Richter-like quiescent",
  subcluster.name = "richter_subclusters",
  resolution = 0.4
)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 665
## Number of edges: 26825
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7241
## Number of communities: 2
## Elapsed time: 0 seconds
DimPlot(seurat_list$`12`, group.by = "richter_subclusters")

Idents(seurat_list$`12`) <- "richter_subclusters"
markers_richter_subclusters <- FindMarkers(
  seurat_list$`12`,
  ident.1 = "Richter-like quiescent_0",
  ident.2 = "Richter-like quiescent_1",
  only.pos = FALSE
)
markers_richter_subclusters <- markers_richter_subclusters %>%
  rownames_to_column("gene") %>%
  arrange(desc(avg_log2FC))
DT::datatable(markers_richter_subclusters)
feat_plot_ccnd2 <- FeaturePlot(
  seurat_list$`12`,
  features = "CCND2",
  pt.size = 0.75
) +
  scale_color_viridis_c(option = "magma")
feat_plot_ccnd2

# Final annotation
seurat_list$`12`$annotation2 <- seurat_list$`12`$richter_subclusters
seurat_list$`12`$richter_subclusters <- NULL
Idents(seurat_list$`12`) <- "annotation2"
ccnd_pos <- levels(Idents(seurat_list$`12`)) == "Richter-like quiescent_0"
levels(Idents(seurat_list$`12`))[ccnd_pos] <- "CCND2+ Richter-like quiescent"
ccnd_neg <- levels(Idents(seurat_list$`12`)) == "Richter-like quiescent_1"
levels(Idents(seurat_list$`12`))[ccnd_neg] <- "CCND2- Richter-like quiescent"
umap_final_annotation_12 <- DimPlot(
  seurat_list$`12`,
  pt.size = 0.75,
  cols = color_palette
)
umap_final_annotation_12

7.2 19

DT::datatable(markers_list$`19`)

Interesting paper on S100A4 and Richter transformation.

Cluster Markers Annotation
0 CD27 CXCR4-CD27+
1 CXCR4 CXCR4+CD27-
2 BIRC3 Richter-like quiescent
3 MIR155HG, MS4A1 MIR155HG+
4 NA poor-quality
5 TOP2A, MKI67 Richter-like proliferative

Visualize markers:

selected_markers_19 <- c("CXCR4", "CD27", "TP53INP1", "BIRC3", "INPP5D",
                         "UVRAG", "PIK3IP1", "MIR155HG", "MS4A1")
feat_plot_19 <- purrr::map(selected_markers_19, function(x) {
  p <- FeaturePlot(seurat_list$`19`, features = x, pt.size = 0.75) +
    scale_color_viridis_c(option = "magma")
  p
})
feat_plot_19
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

# Reannotate
seurat_list$`19`$annotation2 <- seurat_list$`19`$seurat_clusters
new_levels_19 <- c("CXCR4-CD27+", "CXCR4+CD27-", "Richter-like quiescent",
                   "MIR155HG+", "poor-quality", "Richter-like proliferative") 
levels(seurat_list$`19`$annotation2) <- new_levels_19
Idents(seurat_list$`19`) <- "annotation2"
DimPlot(seurat_list$`19`, cols = color_palette, pt.size = 0.75)

3D trajectory:

seurat_list$`19` <- RunUMAP(
  seurat_list$`19`,
  dims = 1:20,
  n.components = 3,
  reduction.name = "umap_3D"
)
umap_19_df <- as.data.frame(seurat_list$`19`@reductions$umap_3D@cell.embeddings)
umap_19_df$annotation2 <- seurat_list$`19`$annotation2
umap_19_3D <- plot_ly(
  umap_19_df,
  x = ~umap_3d_1,
  y = ~umap_3d_2,
  z = ~umap_3d_3,
  color = ~factor(annotation2),
  colors = color_palette,
  size = 0.001,
  alpha = 0.4
)
umap_19_3D <- umap_19_3D %>%
  add_markers()
umap_19_3D
# Expression MIR155HG
umap_19_df$MIR155HG <- seurat_list$`19`[["RNA"]]@data["MIR155HG", ]
umap_19_mir155 <- plot_ly(
  umap_19_df,
  x = ~umap_3d_1,
  y = ~umap_3d_2,
  z = ~umap_3d_3,
  color = ~MIR155HG,
  size = 0.001,
  alpha = 0.7
)
umap_19_mir155 <- umap_19_mir155 %>%
  add_markers()
umap_19_mir155

7.3 3299

8 Save

saveRDS(seurat_list, path_to_save)

9 Session Information

sessionInfo()
## R version 4.0.4 (2021-02-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=es_ES.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=es_ES.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=es_ES.UTF-8       LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] forcats_0.5.1      stringr_1.4.0      dplyr_1.0.6        purrr_0.3.4        readr_1.4.0        tidyr_1.1.3        tibble_3.1.2       tidyverse_1.3.0    plotly_4.9.3       ggpubr_0.4.0       ggplot2_3.3.3      SeuratObject_4.0.1 Seurat_4.0.1       BiocStyle_2.18.1  
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.3.1          backports_1.2.1       plyr_1.8.6            igraph_1.2.6          lazyeval_0.2.2        splines_4.0.4         crosstalk_1.1.1       listenv_0.8.0         scattermore_0.7       digest_0.6.27         htmltools_0.5.1.1     fansi_0.4.2           magrittr_2.0.1        tensor_1.5            cluster_2.1.1         ROCR_1.0-11           limma_3.46.0          openxlsx_4.2.3        globals_0.14.0        modelr_0.1.8          matrixStats_0.58.0    spatstat.sparse_2.0-0 colorspace_2.0-1      rvest_1.0.0           ggrepel_0.9.1         haven_2.3.1           xfun_0.22             crayon_1.4.1          jsonlite_1.7.2        spatstat.data_2.1-0   survival_3.2-10       zoo_1.8-9             glue_1.4.2            polyclip_1.10-0       gtable_0.3.0          leiden_0.3.7          car_3.0-10            future.apply_1.7.0    abind_1.4-5           scales_1.1.1          DBI_1.1.1             rstatix_0.7.0         miniUI_0.1.1.1        Rcpp_1.0.6            viridisLite_0.4.0     xtable_1.8-4          reticulate_1.20       spatstat.core_2.1-2   foreign_0.8-81        DT_0.17               htmlwidgets_1.5.3     httr_1.4.2            RColorBrewer_1.1-2    ellipsis_0.3.2       
##  [55] ica_1.0-2             farver_2.1.0          pkgconfig_2.0.3       sass_0.4.0            uwot_0.1.10           dbplyr_2.1.0          deldir_0.2-10         here_1.0.1            utf8_1.2.1            labeling_0.4.2        tidyselect_1.1.1      rlang_0.4.11          reshape2_1.4.4        later_1.2.0           munsell_0.5.0         cellranger_1.1.0      tools_4.0.4           cli_2.5.0             generics_0.1.0        broom_0.7.5           ggridges_0.5.3        evaluate_0.14         fastmap_1.1.0         yaml_2.2.1            goftest_1.2-2         fs_1.5.0              knitr_1.31            fitdistrplus_1.1-3    zip_2.1.1             RANN_2.6.1            pbapply_1.4-3         future_1.21.0         nlme_3.1-152          mime_0.10             xml2_1.3.2            rstudioapi_0.13       compiler_4.0.4        curl_4.3.1            png_0.1-7             ggsignif_0.6.1        spatstat.utils_2.1-0  reprex_1.0.0          bslib_0.2.5           stringi_1.6.2         highr_0.8             RSpectra_0.16-0       lattice_0.20-41       Matrix_1.3-3          vctrs_0.3.8           pillar_1.6.1          lifecycle_1.0.0       BiocManager_1.30.10   spatstat.geom_2.1-0   lmtest_0.9-38        
## [109] jquerylib_0.1.4       RcppAnnoy_0.0.18      data.table_1.14.0     cowplot_1.1.1         irlba_2.3.3           httpuv_1.6.1          patchwork_1.1.1       R6_2.5.0              bookdown_0.21         promises_1.2.0.1      KernSmooth_2.23-18    gridExtra_2.3         rio_0.5.26            parallelly_1.25.0     codetools_0.2-18      MASS_7.3-53.1         assertthat_0.2.1      rprojroot_2.0.2       withr_2.4.2           sctransform_0.3.2     mgcv_1.8-34           parallel_4.0.4        hms_1.0.0             grid_4.0.4            rpart_4.1-15          rmarkdown_2.7         carData_3.0-4         Rtsne_0.15            shiny_1.6.0           lubridate_1.7.10